#!/usr/bin/env python
#
# Copyright (c) 2009, Jesse Kempf
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# - Redistributions of source code must retain the above copyright notice,
#     this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
#     notice, this list of conditions and the following disclaimer in the
#     documentation and/or other materials provided with the distribution.
# - Neither the names of the author nor the names of its contributors
#     may be used to endorse or promote products derived from this software
#     without specific prior written permission.
#
#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
#  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
#  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
#  OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
#  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
#  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
#  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
#  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
#  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import collections
import optparse
import operator
import os
import re
import sys

CFENGINE_HARD_GROUPS = (
	'any', 'freebsd', 'solaris', 'linux', 'redhat', 'gentoo', 'debian',
	'ubuntu'
)

def err(str):
	print >> sys.stderr, ("%s" % str)

class CFEntity(object):
	ignore = tuple()
	builtin = tuple()

	def __init__(self, file, name):
		self.file = file
		self.name = name

	def __str__(self):
		return "%s '%s' referenced in %s" % (type(self).__name__,
												self.name, self.file)
	@property
	def skip(self):
		return list(self.ignore) + list(self.builtin)

class CFVariable(CFEntity):
	ignore = ('actionsequence',)
	builtin = ('tab', 'host')

class CFGroup(CFEntity):
	builtin = CFENGINE_HARD_GROUPS

class AdditiveDict(dict):
	def __add__(self, rhs):
		newdict = AdditiveDict()

		for key in set(self.keys() + rhs.keys()):
			if key in self and key in rhs:
				newdict[key] = self[key] + rhs[key]
			elif key in self:
				newdict[key] = self[key]
			elif key in rhs:
				newdict[key] = rhs[key]

		return newdict

def find_group_defines(filename, contents):
	mode = ''

	for line in contents:
		if line.startswith('groups:'):
			mode = 'G'
			line = line.replace('groups:', '')

		if mode is 'G':
			try:
				(gname, decl) = line.split('=', 1)
				yield CFGroup(filename, gname.strip())
			except ValueError:
				# There are three possible reasons why the split failed:
				# 1: We've moved on to a new action, like editfiles
				# 2: We've hit a group-conditional statement
				# 3: We're in the middle of a multi-line class definition
				#
				# XXX: We do not yet take care of the instance where
				# 	there's a class-and-statement one liner like
				#	someclass:: newclass = ( TruthFunc(arg) ).
				if re.match(r'[a-zA-Z]+:(?!:)', line):
					mode = ''
				continue

		elif (line.startswith('DefineClasses') or
			line.startswith('ElseDefineClasses') or
			line.startswith('DefineInGroup')):
	
			if "'" in line:
				quot = "'"
			elif '"' in line:
				quot = '"'
			else:
				err("Warning: malformed line: %s" % line)

			(decl, groupnames, _) = line.split(quot)
			groupnames = groupnames.replace(quot, '')
			for gname in groupnames.split(':'):
				yield CFGroup(filename, gname.strip())

		elif 'define=' in line:
				# Conveniently enough, this covers the use of
				# both define= and elsedefine=, for obvious reasons.
				for kind in ('define', 'elsedefine'):
					if kind not in line:
						continue

					base_idx = line.find(kind + '=')
					max_idx = line.find(' ', base_idx)

					if max_idx == -1:
						max_idx = line.find('\t', base_idx)

					if max_idx == -1:
				 		define = line[base_idx:]
					else:
						define = line[base_idx:max_idx]

					(_, groupnames) = define.split('=')
					for gname in groupnames.split(':'):
						yield CFGroup(filename, gname.strip())

def find_group_consumers(fname, contents):
	for line in contents:
		if re.match(r'[a-zA-z.&|]+::', line):
			(guard, _) = line.split('::')

			# normalize out any logical operators
			for oper in ('.', '&', '|', '!'):
				guard = guard.replace(oper, ' ')

			for group in guard.split():
				if group:
					yield CFGroup(fname, group.strip())

def find_variable_defines(fname, contents):
	mode = ''

	for line in contents:
		if line.startswith('control:'):
			mode = 'C'
			line = line.replace('control:', '')

		if mode == 'C':
			try:
				(var, _) = line.split('=', 1)
				yield CFVariable(fname, var.strip())
			except ValueError:
				if re.match(r'[a-zA-Z]+:(?!:)', line):
					mode = ''
				continue

def find_variable_consumers(fname, contents):
	for line in contents:
		line = line.replace('[', ' ')
		line = line.replace(']', ' ')

		if '${' in line:
			for match in re.finditer(r'\$\{([a-zA-Z_]+)\}', line):
				yield CFVariable(fname, match.group(1))
		if '$(' in line:
			for match in re.finditer(r'\$\(([a-zA-Z_]+)\)', line):
				yield CFVariable(fname, match.group(1))

def parseCF(fname, contents):
	contents = [x.strip() for x in contents]

	return AdditiveDict({
		'groups_defined': list(find_group_defines(fname, contents)),
		'groups_consumed': list(find_group_consumers(fname, contents)),
		'variables_defined': list(find_variable_defines(fname, contents)),
		'variables_consumed': list(find_variable_consumers(fname, contents)),
	})

def find_multiple_definitions(entities):
	entities_defined = collections.defaultdict(list)

	for ent in entities:
		entities_defined[ent.name].append(ent)

	for k, v in entities_defined.iteritems():
		def_files = set([g.file for g in v])

		skip = list(type(v[0]).builtin) + list(type(v[0]).ignore)

		if len(def_files) > 1 and k not in v[0].skip:
			print "Warning: %s %s declared in:" % (type(v[0]).__name__, k)
			for f in def_files:
				print "\t%s" % f

def find_orphans(entities, defines):
	for ent in entities:
		if ent.name not in defines and ent.name not in ent.skip:
			print "Warning: Undefined %s" % (ent)

def main(opts, fnames):

	inputs = [ parseCF(fname, file(fname)) for fname in fnames ]

	inputs = reduce(operator.add, inputs)

	# Check for multiply-defined groups, which is an indicator that
	# CFEngine scripts might be trampling each other.

	find_multiple_definitions(inputs['groups_defined'])
	find_orphans(inputs['groups_consumed'],
					[x.name for x in inputs['groups_defined']]
						+ list(CFENGINE_HARD_GROUPS)
	)

	find_multiple_definitions(inputs['variables_defined'])
	find_orphans(inputs['variables_consumed'],
					[x.name for x in inputs['variables_defined']])

if __name__ == '__main__':
	op = optparse.OptionParser(usage='%prog [options] <files>')
	(opts, args) = op.parse_args()

	try:
		(fnames) = args
	except ValueError:
		err("%s" % op.get_usage().strip())
		sys.exit(os.EX_USAGE)

	main(opts, fnames)
